{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import requests\n",
    "import random\n",
    "#中商产业研究院网址\n",
    "url = 'http://s.askci.com/stock/a/?reportTime=2018-09-30&pageNum=%d'\n",
    "#伪装浏览器\n",
    "headers = {\n",
    "    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) \\\n",
    "    AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',\n",
    "    'Referer': 'http://s.askci.com/stock/a/?reportTime=2018-09-30&pageNum=178'\n",
    "}\n",
    "IPs = [{'HTTP': 'HTTP://118.190.95.35:9001'},\n",
    "       {'HTTP': 'HTTP://61.135.217.7:80'},\n",
    "       {'HTTP': 'HTTP://116.1.11.19:80'}]\n",
    "for i in range(1, 179):\n",
    "    # 返回整个网页中的表格列表\n",
    "    dflist = pd.read_html(requests.get(url % i, \n",
    "            headers=headers, proxies=random.choice(IPs)).text)\n",
    "    # 经观察可以发现是第四个就是我们想要的表格\n",
    "    df = dflist[3]\n",
    "    df.to_csv('stock_information.csv', mode='a', \n",
    "              header=1 if i == 1 else None, \n",
    "              index=0, encoding='utf_8_sig')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "#先引入后面可能用到的包（package）\n",
    "import pandas as pd  \n",
    "import numpy as np\n",
    "from scipy import stats\n",
    "import tushare as ts \n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline   \n",
    "\n",
    "#正常显示画图时出现的中文和负号\n",
    "from pylab import mpl\n",
    "mpl.rcParams['font.sans-serif']=['SimHei']\n",
    "mpl.rcParams['axes.unicode_minus']=False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "#导入数据\n",
    "df=pd.read_csv('stock_information.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>序号</th>\n",
       "      <th>股票代码</th>\n",
       "      <th>股票简称</th>\n",
       "      <th>公司名称</th>\n",
       "      <th>省份</th>\n",
       "      <th>城市</th>\n",
       "      <th>主营业务收入(201809)</th>\n",
       "      <th>净利润(201809)</th>\n",
       "      <th>员工人数</th>\n",
       "      <th>上市日期</th>\n",
       "      <th>招股书</th>\n",
       "      <th>公司财报</th>\n",
       "      <th>行业分类</th>\n",
       "      <th>产品类型</th>\n",
       "      <th>主营业务</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>平安银行</td>\n",
       "      <td>平安银行股份有限公司</td>\n",
       "      <td>深圳市</td>\n",
       "      <td>罗湖区</td>\n",
       "      <td>8666400.0</td>\n",
       "      <td>2045600.0</td>\n",
       "      <td>32744</td>\n",
       "      <td>1991-04-03</td>\n",
       "      <td>--</td>\n",
       "      <td>NaN</td>\n",
       "      <td>银行</td>\n",
       "      <td>商业银行业务</td>\n",
       "      <td>经有关监管机构批准的各项商业银行业务</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>万科A</td>\n",
       "      <td>万科企业股份有限公司</td>\n",
       "      <td>深圳市</td>\n",
       "      <td>盐田区</td>\n",
       "      <td>17602213.92</td>\n",
       "      <td>1398455.98</td>\n",
       "      <td>92764</td>\n",
       "      <td>1991-01-29</td>\n",
       "      <td>--</td>\n",
       "      <td>NaN</td>\n",
       "      <td>房地产开发</td>\n",
       "      <td>房地产、物业管理、投资咨询</td>\n",
       "      <td>房地产开发和物业服务</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>国农科技</td>\n",
       "      <td>深圳中国农大科技股份有限公司</td>\n",
       "      <td>深圳市</td>\n",
       "      <td>南山区</td>\n",
       "      <td>25617.86</td>\n",
       "      <td>-259.18</td>\n",
       "      <td>172</td>\n",
       "      <td>1991-01-14</td>\n",
       "      <td>--</td>\n",
       "      <td>NaN</td>\n",
       "      <td>生物医药</td>\n",
       "      <td>生物医药业务</td>\n",
       "      <td>生物制药</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>世纪星源</td>\n",
       "      <td>深圳世纪星源股份有限公司</td>\n",
       "      <td>深圳市</td>\n",
       "      <td>罗湖区</td>\n",
       "      <td>38300.0</td>\n",
       "      <td>653.2</td>\n",
       "      <td>705</td>\n",
       "      <td>1990-12-10</td>\n",
       "      <td>--</td>\n",
       "      <td>NaN</td>\n",
       "      <td>环保工程、物业管理</td>\n",
       "      <td>酒店经营、物业管理收入、环保业务收入</td>\n",
       "      <td>绿色低碳城市社区建设相关的服务业务</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>深振业A</td>\n",
       "      <td>深圳市振业(集团)股份有限公司</td>\n",
       "      <td>深圳市</td>\n",
       "      <td>罗湖区</td>\n",
       "      <td>159389.24</td>\n",
       "      <td>50324.01</td>\n",
       "      <td>370</td>\n",
       "      <td>1992-04-27</td>\n",
       "      <td>--</td>\n",
       "      <td>NaN</td>\n",
       "      <td>房地产开发</td>\n",
       "      <td>物业租赁、房产销售</td>\n",
       "      <td>从事房地产开发与销售</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   序号  股票代码  股票简称             公司名称   省份   城市 主营业务收入(201809) 净利润(201809)  \\\n",
       "0   1     1  平安银行       平安银行股份有限公司  深圳市  罗湖区      8666400.0   2045600.0   \n",
       "1   2     2   万科A       万科企业股份有限公司  深圳市  盐田区    17602213.92  1398455.98   \n",
       "2   3     4  国农科技   深圳中国农大科技股份有限公司  深圳市  南山区       25617.86     -259.18   \n",
       "3   4     5  世纪星源     深圳世纪星源股份有限公司  深圳市  罗湖区        38300.0       653.2   \n",
       "4   5     6  深振业A  深圳市振业(集团)股份有限公司  深圳市  罗湖区      159389.24    50324.01   \n",
       "\n",
       "    员工人数        上市日期 招股书  公司财报       行业分类                产品类型  \\\n",
       "0  32744  1991-04-03  --   NaN         银行              商业银行业务   \n",
       "1  92764  1991-01-29  --   NaN      房地产开发       房地产、物业管理、投资咨询   \n",
       "2    172  1991-01-14  --   NaN       生物医药              生物医药业务   \n",
       "3    705  1990-12-10  --   NaN  环保工程、物业管理  酒店经营、物业管理收入、环保业务收入   \n",
       "4    370  1992-04-27  --   NaN      房地产开发           物业租赁、房产销售   \n",
       "\n",
       "                 主营业务  \n",
       "0  经有关监管机构批准的各项商业银行业务  \n",
       "1          房地产开发和物业服务  \n",
       "2                生物制药  \n",
       "3   绿色低碳城市社区建设相关的服务业务  \n",
       "4          从事房地产开发与销售  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#查看数据基本情况\n",
    "df.head()\n",
    "#利润和主营业务收入百万元"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>股票简称</th>\n",
       "      <th>省份</th>\n",
       "      <th>主营业务收入(201809)</th>\n",
       "      <th>净利润(201809)</th>\n",
       "      <th>员工人数</th>\n",
       "      <th>上市日期</th>\n",
       "      <th>行业分类</th>\n",
       "      <th>产品类型</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>平安银行</td>\n",
       "      <td>深圳市</td>\n",
       "      <td>8666400.0</td>\n",
       "      <td>2045600.0</td>\n",
       "      <td>32744</td>\n",
       "      <td>1991-04-03</td>\n",
       "      <td>银行</td>\n",
       "      <td>商业银行业务</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>万科A</td>\n",
       "      <td>深圳市</td>\n",
       "      <td>17602213.92</td>\n",
       "      <td>1398455.98</td>\n",
       "      <td>92764</td>\n",
       "      <td>1991-01-29</td>\n",
       "      <td>房地产开发</td>\n",
       "      <td>房地产、物业管理、投资咨询</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>国农科技</td>\n",
       "      <td>深圳市</td>\n",
       "      <td>25617.86</td>\n",
       "      <td>-259.18</td>\n",
       "      <td>172</td>\n",
       "      <td>1991-01-14</td>\n",
       "      <td>生物医药</td>\n",
       "      <td>生物医药业务</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>世纪星源</td>\n",
       "      <td>深圳市</td>\n",
       "      <td>38300.0</td>\n",
       "      <td>653.2</td>\n",
       "      <td>705</td>\n",
       "      <td>1990-12-10</td>\n",
       "      <td>环保工程、物业管理</td>\n",
       "      <td>酒店经营、物业管理收入、环保业务收入</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>深振业A</td>\n",
       "      <td>深圳市</td>\n",
       "      <td>159389.24</td>\n",
       "      <td>50324.01</td>\n",
       "      <td>370</td>\n",
       "      <td>1992-04-27</td>\n",
       "      <td>房地产开发</td>\n",
       "      <td>物业租赁、房产销售</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   股票简称   省份 主营业务收入(201809) 净利润(201809)   员工人数        上市日期       行业分类  \\\n",
       "0  平安银行  深圳市      8666400.0   2045600.0  32744  1991-04-03         银行   \n",
       "1   万科A  深圳市    17602213.92  1398455.98  92764  1991-01-29      房地产开发   \n",
       "2  国农科技  深圳市       25617.86     -259.18    172  1991-01-14       生物医药   \n",
       "3  世纪星源  深圳市        38300.0       653.2    705  1990-12-10  环保工程、物业管理   \n",
       "4  深振业A  深圳市      159389.24    50324.01    370  1992-04-27      房地产开发   \n",
       "\n",
       "                 产品类型  \n",
       "0              商业银行业务  \n",
       "1       房地产、物业管理、投资咨询  \n",
       "2              生物医药业务  \n",
       "3  酒店经营、物业管理收入、环保业务收入  \n",
       "4           物业租赁、房产销售  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#保留需要的特征\n",
    "df=df.iloc[:,[2,4,6,7,8,9,12,13]]\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>股票简称</th>\n",
       "      <th>城市</th>\n",
       "      <th>主营业务收入</th>\n",
       "      <th>净利润</th>\n",
       "      <th>员工人数</th>\n",
       "      <th>上市日期</th>\n",
       "      <th>行业分类</th>\n",
       "      <th>产品类型</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>平安银行</td>\n",
       "      <td>深圳市</td>\n",
       "      <td>8666400.0</td>\n",
       "      <td>2045600.0</td>\n",
       "      <td>32744</td>\n",
       "      <td>1991-04-03</td>\n",
       "      <td>银行</td>\n",
       "      <td>商业银行业务</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>万科A</td>\n",
       "      <td>深圳市</td>\n",
       "      <td>17602213.92</td>\n",
       "      <td>1398455.98</td>\n",
       "      <td>92764</td>\n",
       "      <td>1991-01-29</td>\n",
       "      <td>房地产开发</td>\n",
       "      <td>房地产、物业管理、投资咨询</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>国农科技</td>\n",
       "      <td>深圳市</td>\n",
       "      <td>25617.86</td>\n",
       "      <td>-259.18</td>\n",
       "      <td>172</td>\n",
       "      <td>1991-01-14</td>\n",
       "      <td>生物医药</td>\n",
       "      <td>生物医药业务</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>世纪星源</td>\n",
       "      <td>深圳市</td>\n",
       "      <td>38300.0</td>\n",
       "      <td>653.2</td>\n",
       "      <td>705</td>\n",
       "      <td>1990-12-10</td>\n",
       "      <td>环保工程、物业管理</td>\n",
       "      <td>酒店经营、物业管理收入、环保业务收入</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>深振业A</td>\n",
       "      <td>深圳市</td>\n",
       "      <td>159389.24</td>\n",
       "      <td>50324.01</td>\n",
       "      <td>370</td>\n",
       "      <td>1992-04-27</td>\n",
       "      <td>房地产开发</td>\n",
       "      <td>物业租赁、房产销售</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   股票简称   城市       主营业务收入         净利润   员工人数        上市日期       行业分类  \\\n",
       "0  平安银行  深圳市    8666400.0   2045600.0  32744  1991-04-03         银行   \n",
       "1   万科A  深圳市  17602213.92  1398455.98  92764  1991-01-29      房地产开发   \n",
       "2  国农科技  深圳市     25617.86     -259.18    172  1991-01-14       生物医药   \n",
       "3  世纪星源  深圳市      38300.0       653.2    705  1990-12-10  环保工程、物业管理   \n",
       "4  深振业A  深圳市    159389.24    50324.01    370  1992-04-27      房地产开发   \n",
       "\n",
       "                 产品类型  \n",
       "0              商业银行业务  \n",
       "1       房地产、物业管理、投资咨询  \n",
       "2              生物医药业务  \n",
       "3  酒店经营、物业管理收入、环保业务收入  \n",
       "4           物业租赁、房产销售  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#列重命名\n",
    "df.rename(columns={'省份':'城市','主营业务收入(201809)':'主营业务收入', '净利润(201809)':'净利润'}, inplace = True)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>股票简称</th>\n",
       "      <th>城市</th>\n",
       "      <th>主营业务收入</th>\n",
       "      <th>净利润</th>\n",
       "      <th>员工人数</th>\n",
       "      <th>上市日期</th>\n",
       "      <th>行业分类</th>\n",
       "      <th>产品类型</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1956</th>\n",
       "      <td>赛托生物</td>\n",
       "      <td>菏泽市</td>\n",
       "      <td>84714.02</td>\n",
       "      <td>9976.96</td>\n",
       "      <td>622</td>\n",
       "      <td>2017-01-06</td>\n",
       "      <td>化学原料药</td>\n",
       "      <td>甾体药物</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1227</th>\n",
       "      <td>普路通</td>\n",
       "      <td>深圳市</td>\n",
       "      <td>454925.21</td>\n",
       "      <td>9976.3</td>\n",
       "      <td>447</td>\n",
       "      <td>2015-06-29</td>\n",
       "      <td>供应链管理</td>\n",
       "      <td>供应链管理服务</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2421</th>\n",
       "      <td>红星发展</td>\n",
       "      <td>安顺市</td>\n",
       "      <td>114946.68</td>\n",
       "      <td>9975.43</td>\n",
       "      <td>2854</td>\n",
       "      <td>2001-03-20</td>\n",
       "      <td>无机盐</td>\n",
       "      <td>钡盐、锶盐、锰系产品</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2835</th>\n",
       "      <td>航天长峰</td>\n",
       "      <td>北京市</td>\n",
       "      <td>90120.49</td>\n",
       "      <td>996.96</td>\n",
       "      <td>1074</td>\n",
       "      <td>1994-04-25</td>\n",
       "      <td>专用计算机设备</td>\n",
       "      <td>安保科技、医疗器械、电子信息</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1115</th>\n",
       "      <td>利君股份</td>\n",
       "      <td>成都市</td>\n",
       "      <td>38252.72</td>\n",
       "      <td>9951.93</td>\n",
       "      <td>767</td>\n",
       "      <td>2012-01-06</td>\n",
       "      <td>矿山冶金机械</td>\n",
       "      <td>制造业</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2502</th>\n",
       "      <td>中国动力</td>\n",
       "      <td>保定市</td>\n",
       "      <td>2014545.95</td>\n",
       "      <td>99464.51</td>\n",
       "      <td>19716</td>\n",
       "      <td>2004-07-14</td>\n",
       "      <td>船舶制造</td>\n",
       "      <td>制造业</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2402</th>\n",
       "      <td>航天动力</td>\n",
       "      <td>西安市</td>\n",
       "      <td>88796.9</td>\n",
       "      <td>994.96</td>\n",
       "      <td>2483</td>\n",
       "      <td>2003-04-08</td>\n",
       "      <td>航天装备</td>\n",
       "      <td>泵及泵系统、液力变矩器、智能燃气表和智能数据模块、电机、建筑安装、化工生物装备、节能项目</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2845</th>\n",
       "      <td>百大集团</td>\n",
       "      <td>杭州市</td>\n",
       "      <td>52450.9</td>\n",
       "      <td>9935.14</td>\n",
       "      <td>290</td>\n",
       "      <td>1994-08-09</td>\n",
       "      <td>百货</td>\n",
       "      <td>商品销售、旅游服务</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>541</th>\n",
       "      <td>沙钢股份</td>\n",
       "      <td>苏州市</td>\n",
       "      <td>1101955.32</td>\n",
       "      <td>99345.68</td>\n",
       "      <td>4339</td>\n",
       "      <td>2006-10-25</td>\n",
       "      <td>普钢</td>\n",
       "      <td>黑色金属冶炼、优特钢</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2886</th>\n",
       "      <td>湖南盐业</td>\n",
       "      <td>湖南</td>\n",
       "      <td>165223.94</td>\n",
       "      <td>9933.93</td>\n",
       "      <td>4472</td>\n",
       "      <td>2018-03-26</td>\n",
       "      <td>--</td>\n",
       "      <td>制盐、食用盐、工业盐、芒硝</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1022</th>\n",
       "      <td>巨人网络</td>\n",
       "      <td>重庆市</td>\n",
       "      <td>287965.48</td>\n",
       "      <td>99303.49</td>\n",
       "      <td>3917</td>\n",
       "      <td>2011-03-02</td>\n",
       "      <td>游戏</td>\n",
       "      <td>游戏相关业务、互联网金融服务</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2922</th>\n",
       "      <td>开滦股份</td>\n",
       "      <td>唐山市</td>\n",
       "      <td>1544997.57</td>\n",
       "      <td>99110.88</td>\n",
       "      <td>10816</td>\n",
       "      <td>2004-06-02</td>\n",
       "      <td>煤炭开采洗选</td>\n",
       "      <td>煤化工</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1907</th>\n",
       "      <td>优博讯</td>\n",
       "      <td>深圳市</td>\n",
       "      <td>69344.04</td>\n",
       "      <td>9891.86</td>\n",
       "      <td>581</td>\n",
       "      <td>2016-08-09</td>\n",
       "      <td>通信终端设备</td>\n",
       "      <td>智能移动终端</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2839</th>\n",
       "      <td>王府井</td>\n",
       "      <td>北京市</td>\n",
       "      <td>1919815.52</td>\n",
       "      <td>98883.82</td>\n",
       "      <td>10849</td>\n",
       "      <td>1994-05-06</td>\n",
       "      <td>百货</td>\n",
       "      <td>商品零售、商业物业出租</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2213</th>\n",
       "      <td>兰花科创</td>\n",
       "      <td>晋城市</td>\n",
       "      <td>631508.99</td>\n",
       "      <td>98844.48</td>\n",
       "      <td>18453</td>\n",
       "      <td>1998-12-17</td>\n",
       "      <td>煤炭开采洗选</td>\n",
       "      <td>无烟煤、焦煤、动力煤</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1293</th>\n",
       "      <td>华统股份</td>\n",
       "      <td>金华市</td>\n",
       "      <td>367104.25</td>\n",
       "      <td>9882.89</td>\n",
       "      <td>2018</td>\n",
       "      <td>2017-01-10</td>\n",
       "      <td>肉制品</td>\n",
       "      <td>饲料、生鲜猪肉、生鲜禽肉、火腿、酱卤</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2616</th>\n",
       "      <td>老凤祥</td>\n",
       "      <td>上海市</td>\n",
       "      <td>3650485.26</td>\n",
       "      <td>98780.13</td>\n",
       "      <td>2517</td>\n",
       "      <td>1992-08-14</td>\n",
       "      <td>珠宝首饰</td>\n",
       "      <td>笔类、珠宝首饰、黄金交易、工艺品销售、商贸</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1720</th>\n",
       "      <td>天银机电</td>\n",
       "      <td>苏州市</td>\n",
       "      <td>57716.19</td>\n",
       "      <td>9878.27</td>\n",
       "      <td>1097</td>\n",
       "      <td>2012-07-26</td>\n",
       "      <td>压缩机</td>\n",
       "      <td>家电零配件、军工电子</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1851</th>\n",
       "      <td>景嘉微</td>\n",
       "      <td>长沙市</td>\n",
       "      <td>29286.45</td>\n",
       "      <td>9868.1</td>\n",
       "      <td>479</td>\n",
       "      <td>2016-03-31</td>\n",
       "      <td>电子零部件</td>\n",
       "      <td>图形显控领域产品、小型专业化雷达</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2478</th>\n",
       "      <td>金证股份</td>\n",
       "      <td>深圳市</td>\n",
       "      <td>354202.53</td>\n",
       "      <td>9866.98</td>\n",
       "      <td>6019</td>\n",
       "      <td>2003-12-24</td>\n",
       "      <td>行业应用软件</td>\n",
       "      <td>计算机信息服务</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      股票简称   城市      主营业务收入       净利润   员工人数        上市日期     行业分类  \\\n",
       "1956  赛托生物  菏泽市    84714.02   9976.96    622  2017-01-06    化学原料药   \n",
       "1227   普路通  深圳市   454925.21    9976.3    447  2015-06-29    供应链管理   \n",
       "2421  红星发展  安顺市   114946.68   9975.43   2854  2001-03-20      无机盐   \n",
       "2835  航天长峰  北京市    90120.49    996.96   1074  1994-04-25  专用计算机设备   \n",
       "1115  利君股份  成都市    38252.72   9951.93    767  2012-01-06   矿山冶金机械   \n",
       "2502  中国动力  保定市  2014545.95  99464.51  19716  2004-07-14     船舶制造   \n",
       "2402  航天动力  西安市     88796.9    994.96   2483  2003-04-08     航天装备   \n",
       "2845  百大集团  杭州市     52450.9   9935.14    290  1994-08-09       百货   \n",
       "541   沙钢股份  苏州市  1101955.32  99345.68   4339  2006-10-25       普钢   \n",
       "2886  湖南盐业   湖南   165223.94   9933.93   4472  2018-03-26       --   \n",
       "1022  巨人网络  重庆市   287965.48  99303.49   3917  2011-03-02       游戏   \n",
       "2922  开滦股份  唐山市  1544997.57  99110.88  10816  2004-06-02   煤炭开采洗选   \n",
       "1907   优博讯  深圳市    69344.04   9891.86    581  2016-08-09   通信终端设备   \n",
       "2839   王府井  北京市  1919815.52  98883.82  10849  1994-05-06       百货   \n",
       "2213  兰花科创  晋城市   631508.99  98844.48  18453  1998-12-17   煤炭开采洗选   \n",
       "1293  华统股份  金华市   367104.25   9882.89   2018  2017-01-10      肉制品   \n",
       "2616   老凤祥  上海市  3650485.26  98780.13   2517  1992-08-14     珠宝首饰   \n",
       "1720  天银机电  苏州市    57716.19   9878.27   1097  2012-07-26      压缩机   \n",
       "1851   景嘉微  长沙市    29286.45    9868.1    479  2016-03-31    电子零部件   \n",
       "2478  金证股份  深圳市   354202.53   9866.98   6019  2003-12-24   行业应用软件   \n",
       "\n",
       "                                              产品类型  \n",
       "1956                                          甾体药物  \n",
       "1227                                       供应链管理服务  \n",
       "2421                                    钡盐、锶盐、锰系产品  \n",
       "2835                                安保科技、医疗器械、电子信息  \n",
       "1115                                           制造业  \n",
       "2502                                           制造业  \n",
       "2402  泵及泵系统、液力变矩器、智能燃气表和智能数据模块、电机、建筑安装、化工生物装备、节能项目  \n",
       "2845                                     商品销售、旅游服务  \n",
       "541                                     黑色金属冶炼、优特钢  \n",
       "2886                                 制盐、食用盐、工业盐、芒硝  \n",
       "1022                                游戏相关业务、互联网金融服务  \n",
       "2922                                           煤化工  \n",
       "1907                                        智能移动终端  \n",
       "2839                                   商品零售、商业物业出租  \n",
       "2213                                    无烟煤、焦煤、动力煤  \n",
       "1293                            饲料、生鲜猪肉、生鲜禽肉、火腿、酱卤  \n",
       "2616                         笔类、珠宝首饰、黄金交易、工艺品销售、商贸  \n",
       "1720                                    家电零配件、军工电子  \n",
       "1851                              图形显控领域产品、小型专业化雷达  \n",
       "2478                                       计算机信息服务  "
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.sort_values('净利润',ascending=False)[:20]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "hide_input": false,
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
